#!/bin/bash
#BSUB -n 1                     # 1 cores
#BSUB -W 3:59                   # 3 hours run-time
#BSUB -R "rusage[mem=8000]"     # 8 GB per core
#BSUB -R "rusage[ngpus_excl_p=1]"
#BSUB -R "select[gpu_mtotal0>=10000]"
#BSUB -o /nfs/iiscratch-zhang.inf.ethz.ch/export/zhang/export/fm/exe_log/gptj.out.%J
#BSUB -e /nfs/iiscratch-zhang.inf.ethz.ch/export/zhang/export/fm/exe_log/gptj.err.%J
env2lmod
module load gcc/6.3.0 cuda/11.0.3             # Load modules from Euler setup
module load eth_proxy
source activate base                                     # Activate my conda python environment
cd /nfs/iiscratch-zhang.inf.ethz.ch/export/zhang/export/fm/GPT-home-private      # Change directory

nvidia-smi

ifconfig

world_size=2

DIST_CONF="--pp-mode pipe_sync_sample_mask_token_pipe --pipeline-group-size $world_size --cuda-id 0"
MODEL_CONF="--model-type gptj --model-name /nfs/iiscratch-zhang.inf.ethz.ch/export/zhang/export/fm/pretrained_models/gpt-j-6B --num-iters 99999999999"
INFERENCE_CONF="--fp16 --batch-size 1 --token-micro-batch-size 1 --input-seq-length 1024 --generate-seq-length 64 --top-p 1 --temperature 0 --num-layers 14"
COOR_CONF="--coordinator-server-ip 129.132.93.85"
#!/bin/bash

export http_proxy=http://proxy.ethz.ch:3128
export https_proxy=https://proxy.ethz.ch:3128
export NCCL_SOCKET_IFNAME=access
export GLOO_SOCKET_IFNAME=access
export NCCL_DEBUG=INFO
export NCCL_IB_DISABLE=1
export NCCL_P2P_DISABLE=1

python -u dist_latency_inference_on_euler.py $DIST_CONF $MODEL_CONF $INFERENCE_CONF $COOR_CONF \
